---
title: "Rake"
author: "Alvaro Gonzalez"
date: "1/2/2021"
output: html_document
---

```{r,warning=FALSE,include=FALSE}
rm(list = ls()) #Clean environment


library(dplyr)
library(tidyverse)
library(survey)
library(reshape2)
library(ggplot2)
library(grid)
library(gridExtra)
```

```{r,warning=FALSE,include=FALSE}
eval(parse("functions.R", encoding="UTF-8"))
#source("functions.R",encoding = "utf-8")
source("Quotas.R",encoding = "utf-8")
```

```{r,warning=FALSE,include=FALSE}

AUS <- read_csv("C:/Users/agonzalez.DPLATAM/Dropbox/Vaccine Study/Github/CANDOUR/data/data_AUS.csv")
BRA <- read_csv("C:/Users/agonzalez.DPLATAM/Dropbox/Vaccine Study/Github/CANDOUR/data/data_BR.csv")
CAN <- read_csv("C:/Users/agonzalez.DPLATAM/Dropbox/Vaccine Study/Github/CANDOUR/data/data_CAN.csv")
CHI <- read_csv("C:/Users/agonzalez.DPLATAM/Dropbox/Vaccine Study/Github/CANDOUR/data/data_CHL.csv")
CHN <- read_csv("C:/Users/agonzalez.DPLATAM/Dropbox/Vaccine Study/Github/CANDOUR/data/data_CHN.csv")
COL <- read_csv("C:/Users/agonzalez.DPLATAM/Dropbox/Vaccine Study/Github/CANDOUR/data/data_COL.csv")
FRA <- read_csv("C:/Users/agonzalez.DPLATAM/Dropbox/Vaccine Study/Github/CANDOUR/data/data_FR.csv")
IND <- read_csv("C:/Users/agonzalez.DPLATAM/Dropbox/Vaccine Study/Github/CANDOUR/data/data_IND.csv")
ITA <- read_csv("C:/Users/agonzalez.DPLATAM/Dropbox/Vaccine Study/Github/CANDOUR/data/data_IT.csv")
SPA <- read_csv("C:/Users/agonzalez.DPLATAM/Dropbox/Vaccine Study/Github/CANDOUR/data/data_SP.csv")
UGA <- read_csv("C:/Users/agonzalez.DPLATAM/Dropbox/Vaccine Study/Github/CANDOUR/data/data_UGA.csv")
UK <-  read_csv("C:/Users/agonzalez.DPLATAM/Dropbox/Vaccine Study/Github/CANDOUR/data/data_UK.csv")
US <-  read_csv("C:/Users/agonzalez.DPLATAM/Dropbox/Vaccine Study/Github/CANDOUR/data/data_US.csv")
  
```


```{r,warning=FALSE,include=FALSE}

fn.cleaner <- function(dataframe){
  dataframe <- fn.age(dataframe)
  dataframe <- fn.education(dataframe)
  dataframe <- fn.gender(dataframe)
  dataframe <- fn.regions(dataframe)
  return(dataframe)
}

AUS <- fn.cleaner(AUS)
BRA <- fn.cleaner(BRA)
CAN <- fn.cleaner(CAN)
CHI <- fn.cleaner(CHI)
CHN <- fn.cleaner(CHN)
COL <- fn.cleaner(COL)
FRA <- fn.cleaner(FRA)
IND <- fn.cleaner(IND)
ITA <- fn.cleaner(ITA)
SPA <- fn.cleaner(SPA)
UGA <- fn.cleaner(UGA)
UK  <- fn.cleaner(UK)
US <-  fn.cleaner(US)  
```

```{r,warning=FALSE,include=FALSE}
fn.dropNA <-function(dataframe){
  dataframe <- dataframe[!dataframe$REGION_0=="NA",] 
  dataframe <- dataframe[!dataframe$gender=="NA",]
  dataframe <- dataframe[!dataframe$age=="NA",]
  dataframe <- dataframe[!dataframe$education=="NA",]
  
  #dataframe$ids <- as.numeric(rownames(dataframe))
  dataframe <- dataframe %>% select('id','REGION_0','age','gender','education')
  dataframe <- dataframe[!is.na(dataframe$REGION_0),]
  
  return(dataframe)
}

```

```{r}
fn.replace.quotas <- function(dataframe){
  dataframe$Freq <- round((dataframe$Freq/sum(dataframe$Freq))*100,2)
  return(dataframe)
}
```

```{r,warning=FALSE,include=FALSE}
    
Quotas_list_df <- list(AUS.age,AUS.education,AUS.gender,AUS.region,
                       BRA.age,BRA.education,BRA.gender,BRA.region,
                       CAN.age,CAN.education,CAN.gender,CAN.region,
                       CHI.age,CHI.education,CHI.gender,CHI.region,
                       CHN.age,CHN.education,CHN.gender,CHN.region,
                       COL.age,COL.education,COL.gender,COL.region,
                       FRA.age,FRA.education,FRA.gender,FRA.region,
                       IND.age,IND.education,IND.gender,IND.region,
                       ITA.age,ITA.education,ITA.gender,ITA.region,
                       SPA.age,SPA.education,SPA.gender,SPA.region,
                       UGA.age,UGA.education,UGA.gender,UGA.region,
                       UK.age,UK.education,UK.gender,UK.region,
                       US.age,US.education,US.gender,US.region)

names(Quotas_list_df) = c('AUS.age','AUS.education','AUS.gender','AUS.region',
                          'BRA.age','BRA.education','BRA.gender','BRA.region',
                          'CAN.age','CAN.education','CAN.gender','CAN.region',
                          'CHI.age','CHI.education','CHI.gender','CHI.region',
                          'CHN.age','CHN.education','CHN.gender','CHN.region',
                          'COL.age','COL.education','COL.gender','COL.region',
                          'FRA.age','FRA.education','FRA.gender','FRA.region',
                          'IND.age','IND.education','IND.gender','IND.region',
                          'ITA.age','ITA.education','ITA.gender','ITA.region',
                          'SPA.age','SPA.education','SPA.gender','SPA.region',
                          'UGA.age','UGA.education','UGA.gender','UGA.region',
                          'UK.age','UK.education','UK.gender','UK.region',
                          'US.age','US.education','US.gender','US.region')

Quotas_list_df <- lapply(Quotas_list_df,fn.replace.quotas)
```

#Drop computation
```{r}
Aus2 <- fn.dropNA(AUS)
Bra2 <- fn.dropNA(BRA)
Can2 <- fn.dropNA(CAN)
Chi2 <- fn.dropNA(CHI)
Chn2 <- fn.dropNA(CHN)
Col2 <- fn.dropNA(COL)
fra2 <- fn.dropNA(FRA)
Ind2 <- fn.dropNA(IND)
Ita2 <- fn.dropNA(ITA)
Spa2 <- fn.dropNA(SPA)
Uga2 <- fn.dropNA(UGA)
UK2  <- fn.dropNA(UK)
US2 <-fn.dropNA(US) 
```


```{r,warning=FALSE,include=FALSE}

fn.rake <-function(dataframe,list_of_variables,list_of_quotas){
  dataframe <- fn.dropNA(dataframe)
  dsurvey <-svydesign(id=~id, data=dataframe)
  dsurveyr <- rake(dsurvey,list_of_variables,list_of_quotas)
  weights <- attr(dsurveyr[["postStrata"]][[1]][[1]],'weights')
  dataframe$weights <- weights
  return(dataframe)

  }

```



```{r}
fn.graphs <- function(dataframe_with_weights,printfile,list_of_census_a_e_g_r){

c_age <- list_of_census_a_e_g_r[[1]]  
c_edu <- list_of_census_a_e_g_r[[2]]
c_gen <- list_of_census_a_e_g_r[[3]]
c_reg <- list_of_census_a_e_g_r[[4]]

age_count <- as.data.frame(xtabs(~age,dataframe_with_weights))
age_count$Freq <- (age_count$Freq/sum(age_count$Freq))*100


age_w <- dataframe_with_weights %>% group_by_(.dots=c("age")) %>% summarize(sum=sum(weights))

names_age <- age_count$age
data_age <- data.frame(names_age, age_count$Freq, age_w$sum)

datalong_age <- melt(data_age, id=c("names_age"))

f1<- ggplot() +
  geom_bar(aes(x = names_age, y = value, fill = variable), data = datalong_age, 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red","blue"), 
                    labels = c("No weights", "Weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Age") + geom_point(data = c_age, aes(y = Freq, x = age))



gender_count <- as.data.frame(xtabs(~gender,dataframe_with_weights))
gender_count$Freq <- (gender_count$Freq/sum(gender_count$Freq))*100


gender_w <- dataframe_with_weights %>% group_by_(.dots=c("gender")) %>% summarize(sum=sum(weights))

names_gender <- gender_count$gender
data_gender <- data.frame(names_gender, gender_count$Freq, gender_w$sum)

datalong_gender <- melt(data_gender, id=c("names_gender"))


f2<-ggplot() +
  geom_bar(aes(x = names_gender, y = value, fill = variable),data =datalong_gender , 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red","blue"), 
                    labels = c("No weights", "Weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Gender") + geom_point(data = c_gen, aes(y = Freq, x = gender))





region_count <- as.data.frame(xtabs(~REGION_0,dataframe_with_weights))
region_count$Freq <- (region_count$Freq/sum(region_count$Freq))*100


region_w <- dataframe_with_weights %>% group_by_(.dots=c("REGION_0")) %>% summarize(sum=sum(weights))

names_region <- region_count$REGION_0
data_region <- data.frame(names_region, region_count$Freq, region_w$sum)

datalong_region<- melt(data_region, id=c("names_region"))


f3<-ggplot() +
  geom_bar(aes(x = names_region, y = value, fill = variable),data = datalong_region, 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red","blue"), 
                    labels = c("No weights", "Weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Region") + geom_point(data = c_reg, aes(y = Freq, x = REGION_0))


education_count <- as.data.frame(xtabs(~education,dataframe_with_weights))
education_count$Freq <- (education_count$Freq/sum(education_count$Freq))*100


education_w <- dataframe_with_weights %>% group_by_(.dots=c("education")) %>% summarize(sum=sum(weights))

names_education <- education_count$education
data_education <- data.frame(names_education, education_count$Freq, education_w$sum)

datalong_education<- melt(data_education, id=c("names_education"))


f4<-ggplot() +
  geom_bar(aes(x = names_education, y = value, fill = variable), data = datalong_education,
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red","blue"), 
                    labels = c("No weights", "Weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Education") + geom_point(data = c_edu, aes(y = Freq, x = education))


if(printfile =='yes'){
pdf_name <- paste(deparse(substitute(dataframe_with_weights)),'.pdf',sep = "")


pdf(pdf_name)


#print(f1)
#print(f2)
#print(f3)
#print(f4)

grid.arrange(f1, 
             f2,
             f3,
             f4,
             nrow=2,
             ncol = 2,
             top = "Raking Graphs")

dev.off()

}
else{
print(f1)
print(f2)
print(f3)
print(f4)}


}



```

```{r}
#Merges weight with the dataframe after cleaner! Some observations are loss!

fn.merge <-function(dataframe,list_of_variables,list_of_quotas){
 data_w <- fn.rake(dataframe,list_of_variables,list_of_quotas)
 Survey_with_weights <- merge(dataframe,data_w,by ="id", all.x=TRUE)
 Survey_with_weights <- Survey_with_weights %>% select("id","weights")
 number_of_rows <- nrow(Survey_with_weights)  
 Survey_with_weights$weights[is.na(Survey_with_weights$weights)] <- 100/number_of_rows
 Survey_with_weights$weights <- (Survey_with_weights$weights/sum(Survey_with_weights$weights))*100
 return(Survey_with_weights)
  }
```

### AUSTRALIA
```{r}
#Pass the variables in a list with ~ and search in Quotas_list_df for census quotas
#List of variables REGION_0, age, education, gender
#pass yes to print a pdf file, else the file will be print here

AUS_w <- fn.merge(AUS,list(~education),list(Quotas_list_df$AUS.education))
fn.graphs(fn.rake(AUS,list(~education),list(Quotas_list_df$AUS.education)),'no',list(Quotas_list_df$AUS.age,Quotas_list_df$AUS.education,Quotas_list_df$AUS.gender,Quotas_list_df$AUS.region))
#write.csv(AUS_w,'weights/AUS_w.csv')
```

```{r}
CHI_w <- fn.merge(CHI,list(~education,~age),list(Quotas_list_df$CHI.education,Quotas_list_df$CHI.age))
fn.graphs(fn.rake(CHI,list(~education,~age),list(Quotas_list_df$CHI.education,Quotas_list_df$CHI.age)),'yes',list(Quotas_list_df$CHI.age,Quotas_list_df$CHI.education,Quotas_list_df$CHI.gender,Quotas_list_df$CHI.region))
write.csv(CHI_w,'weights/CHI_w.csv')
```

##BRAZIL

```{r}
BRA_w <- fn.merge(BRA,list(~education),list(Quotas_list_df$BRA.education))
fn.graphs(fn.rake(BRA,list(~education),list(Quotas_list_df$BRA.education)),'yes',list(Quotas_list_df$BRA.age,Quotas_list_df$BRA.education,Quotas_list_df$BRA.gender,Quotas_list_df$BRA.region))
write.csv(BRA_w,'weights/BRA_w.csv')
```
##CANADA, skip

```{r}
list_of_census_a_e_g_r <- list(Quotas_list_df$CAN.age,Quotas_list_df$CAN.education,Quotas_list_df$CAN.gender,Quotas_list_df$CAN.region)

c_age <- list_of_census_a_e_g_r[[1]]  
c_edu <- list_of_census_a_e_g_r[[2]]
c_gen <- list_of_census_a_e_g_r[[3]]
c_reg <- list_of_census_a_e_g_r[[4]]

CAN <- fn.dropNA(CAN)

age_count <- as.data.frame(xtabs(~age,CAN))

age_count$Freq <- (age_count$Freq/sum(age_count$Freq))*100
names_age <- age_count$age
data_age <- data.frame(names_age, age_count$Freq)

datalong_age <- melt(data_age, id=c("names_age"))

f1<- ggplot() +
  geom_bar(aes(x = names_age, y = value, fill = variable), data = datalong_age, 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red","blue","green4"), 
                    labels = c("No weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Age") + geom_point(data = c_age, aes(y = Freq, x = age))


gender_count <- as.data.frame(xtabs(~gender,CAN))
gender_count$Freq <- (gender_count$Freq/sum(gender_count$Freq))*100
names_gender <- gender_count$gender

data_gender <- data.frame(names_gender, gender_count$Freq)

datalong_gender <- melt(data_gender, id=c("names_gender"))


f2<-ggplot() +
  geom_bar(aes(x = names_gender, y = value, fill = variable),data =datalong_gender , 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red"), 
                    labels = c("No weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Gender") + geom_point(data = c_gen, aes(y = Freq, x = gender))





region_count <- as.data.frame(xtabs(~REGION_0,CAN))
region_count$Freq <- (region_count$Freq/sum(region_count$Freq))*100
names_region <- region_count$REGION_0

data_region <- data.frame(names_region, region_count$Freq)

datalong_region<- melt(data_region, id=c("names_region"))


f3<-ggplot() +
  geom_bar(aes(x = names_region, y = value, fill = variable),data = datalong_region, 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red"), 
                    labels = c("No weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Region") + geom_point(data = c_reg, aes(y = Freq, x = REGION_0))


education_count <- as.data.frame(xtabs(~education,CAN))

education_count$Freq <- (education_count$Freq/sum(education_count$Freq))*100

names_education <- education_count$education

data_education <- data.frame(names_education, education_count$Freq)

datalong_education<- melt(data_education, id=c("names_education"))


f4<-ggplot() +
  geom_bar(aes(x = names_education, y = value, fill = variable), data = datalong_education,
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red","blue","green4"), 
                    labels = c("No weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Education") + geom_point(data = c_edu, aes(y = Freq, x = education))

pdf_name <- paste('CANweights','.pdf',sep = "")


pdf(pdf_name)


#print(f1)
#print(f2)
#print(f3)
#print(f4)

grid.arrange(f1, 
             f2,
             f3,
             f4,
             nrow=2,
             ncol = 2,
             top = "Raking Graphs")

dev.off()
```


## China

```{r}
CHN_w <- fn.merge(CHN,list(~education,~age),list(Quotas_list_df$CHN.education,Quotas_list_df$CHN.age))
fn.graphs(fn.rake(CHN,list(~education,~age),list(Quotas_list_df$CHN.education,Quotas_list_df$CHN.age)),'yes',list(Quotas_list_df$CHN.age,Quotas_list_df$CHN.education,Quotas_list_df$CHN.gender,Quotas_list_df$CHN.region))
write.csv(CHN_w,'weights/CHN_w.csv')
```
##COLOMBIA

```{r}
COL_w <- fn.merge(COL,list(~education,~age),list(Quotas_list_df$COL.education,Quotas_list_df$COL.age))
fn.graphs(fn.rake(COL,list(~education,~age),list(Quotas_list_df$COL.education,Quotas_list_df$COL.age)),'no',list(Quotas_list_df$COL.age,Quotas_list_df$COL.education,Quotas_list_df$COL.gender,Quotas_list_df$COL.region))
write.csv(COL_w,'weights/COL_w.csv')
```

###FRANCE

```{r}
FRA_w <- fn.merge(FRA,list(~education,~gender),list(Quotas_list_df$FRA.education,Quotas_list_df$FRA.gender))
fn.graphs(fn.rake(FRA,list(~education,~gender),list(Quotas_list_df$FRA.education,Quotas_list_df$FRA.gender)),'yes',list(Quotas_list_df$FRA.age,Quotas_list_df$FRA.education,Quotas_list_df$FRA.gender,Quotas_list_df$FRA.region))
write.csv(FRA_w,'weights/FRA_w.csv')
```

###Italy

```{r}
ITA_w <- fn.merge(ITA,list(~age,~education),list(Quotas_list_df$ITA.age,Quotas_list_df$ITA.education))
fn.graphs(fn.rake(ITA,list(~age,~education),list(Quotas_list_df$ITA.age,Quotas_list_df$ITA.education)),'yes',list(Quotas_list_df$ITA.age,Quotas_list_df$ITA.education,Quotas_list_df$ITA.gender,Quotas_list_df$ITA.region))

write.csv(ITA_w,'weights/ITA_w.csv')
```
##Spain,skip


```{r}
list_of_census_a_e_g_r <- list(Quotas_list_df$SPA.age,Quotas_list_df$SPA.education,Quotas_list_df$SPA.gender,Quotas_list_df$SPA.region)

c_age <- list_of_census_a_e_g_r[[1]]  
c_edu <- list_of_census_a_e_g_r[[2]]
c_gen <- list_of_census_a_e_g_r[[3]]
c_reg <- list_of_census_a_e_g_r[[4]]

SPA <- fn.dropNA(SPA)

age_count <- as.data.frame(xtabs(~age,SPA))

age_count$Freq <- (age_count$Freq/sum(age_count$Freq))*100
names_age <- age_count$age
data_age <- data.frame(names_age, age_count$Freq)

datalong_age <- melt(data_age, id=c("names_age"))

f1<- ggplot() +
  geom_bar(aes(x = names_age, y = value, fill = variable), data = datalong_age, 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red","blue","green4"), 
                    labels = c("No weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Age") + geom_point(data = c_age, aes(y = Freq, x = age))


gender_count <- as.data.frame(xtabs(~gender,SPA))
gender_count$Freq <- (gender_count$Freq/sum(gender_count$Freq))*100
names_gender <- gender_count$gender

data_gender <- data.frame(names_gender, gender_count$Freq)

datalong_gender <- melt(data_gender, id=c("names_gender"))


f2<-ggplot() +
  geom_bar(aes(x = names_gender, y = value, fill = variable),data =datalong_gender , 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red"), 
                    labels = c("No weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Gender") + geom_point(data = c_gen, aes(y = Freq, x = gender))





region_count <- as.data.frame(xtabs(~REGION_0,SPA))
region_count$Freq <- (region_count$Freq/sum(region_count$Freq))*100
names_region <- region_count$REGION_0

data_region <- data.frame(names_region, region_count$Freq)

datalong_region<- melt(data_region, id=c("names_region"))


f3<-ggplot() +
  geom_bar(aes(x = names_region, y = value, fill = variable),data = datalong_region, 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red"), 
                    labels = c("No weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Region") + geom_point(data = c_reg, aes(y = Freq, x = REGION_0))


education_count <- as.data.frame(xtabs(~education,SPA))

education_count$Freq <- (education_count$Freq/sum(education_count$Freq))*100

names_education <- education_count$education

data_education <- data.frame(names_education, education_count$Freq)

datalong_education<- melt(data_education, id=c("names_education"))


f4<-ggplot() +
  geom_bar(aes(x = names_education, y = value, fill = variable), data = datalong_education,
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red","blue","green4"), 
                    labels = c("No weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Education") + geom_point(data = c_edu, aes(y = Freq, x = education))

pdf_name <- paste('SPAweights','.pdf',sep = "")


pdf(pdf_name)


#print(f1)
#print(f2)
#print(f3)
#print(f4)

grid.arrange(f1, 
             f2,
             f3,
             f4,
             nrow=2,
             ncol = 2,
             top = "Raking Graphs")

dev.off()
```

```{r}
US_w <- fn.merge(US,list(~age,~education),list(Quotas_list_df$US.age,Quotas_list_df$US.education))
fn.graphs(fn.rake(US,list(~age,~education),list(Quotas_list_df$US.age,Quotas_list_df$US.education)),'yes',list(Quotas_list_df$US.age,Quotas_list_df$US.education,Quotas_list_df$US.gender,Quotas_list_df$US.region))
```



###UK

```{r}
UK_w <- fn.merge(UK,list(~age,~education),list(Quotas_list_df$UK.age,Quotas_list_df$UK.education))
fn.graphs(fn.rake(UK,list(~age,~education),list(Quotas_list_df$UK.age,Quotas_list_df$UK.education)),'yes',list(Quotas_list_df$UK.age,Quotas_list_df$UK.education,Quotas_list_df$UK.gender,Quotas_list_df$UK.region))
write.csv(UK_w,'weights/UK_w.csv')
```
###INDIA

```{r}
list_of_census_a_e_g_r <- list(Quotas_list_df$IND.age,Quotas_list_df$IND.education,Quotas_list_df$IND.gender,Quotas_list_df$IND.region)

c_age <- list_of_census_a_e_g_r[[1]]  
c_edu <- list_of_census_a_e_g_r[[2]]
c_gen <- list_of_census_a_e_g_r[[3]]
c_reg <- list_of_census_a_e_g_r[[4]]

SPA <- fn.dropNA(IND)

age_count <- as.data.frame(xtabs(~age,SPA))

age_count$Freq <- (age_count$Freq/sum(age_count$Freq))*100
names_age <- age_count$age
data_age <- data.frame(names_age, age_count$Freq)

datalong_age <- melt(data_age, id=c("names_age"))

f1<- ggplot() +
  geom_bar(aes(x = names_age, y = value, fill = variable), data = datalong_age, 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red","blue","green4"), 
                    labels = c("No weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Age") + geom_point(data = c_age, aes(y = Freq, x = age))


gender_count <- as.data.frame(xtabs(~gender,SPA))
gender_count$Freq <- (gender_count$Freq/sum(gender_count$Freq))*100
names_gender <- gender_count$gender

data_gender <- data.frame(names_gender, gender_count$Freq)

datalong_gender <- melt(data_gender, id=c("names_gender"))


f2<-ggplot() +
  geom_bar(aes(x = names_gender, y = value, fill = variable),data =datalong_gender , 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red"), 
                    labels = c("No weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Gender") + geom_point(data = c_gen, aes(y = Freq, x = gender))





region_count <- as.data.frame(xtabs(~REGION_0,SPA))
region_count$Freq <- (region_count$Freq/sum(region_count$Freq))*100
names_region <- region_count$REGION_0

data_region <- data.frame(names_region, region_count$Freq)

datalong_region<- melt(data_region, id=c("names_region"))


f3<-ggplot() +
  geom_bar(aes(x = names_region, y = value, fill = variable),data = datalong_region, 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red"), 
                    labels = c("No weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Region") + geom_point(data = c_reg, aes(y = Freq, x = REGION_0))


education_count <- as.data.frame(xtabs(~education,SPA))

education_count$Freq <- (education_count$Freq/sum(education_count$Freq))*100

names_education <- education_count$education

data_education <- data.frame(names_education, education_count$Freq)

datalong_education<- melt(data_education, id=c("names_education"))


f4<-ggplot() +
  geom_bar(aes(x = names_education, y = value, fill = variable), data = datalong_education,
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red","blue","green4"), 
                    labels = c("No weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Education") + geom_point(data = c_edu, aes(y = Freq, x = education))

pdf_name <- paste('SPAweights','.pdf',sep = "")


pdf(pdf_name)


#print(f1)
#print(f2)
#print(f3)
#print(f4)

grid.arrange(f1, 
             f2,
             f3,
             f4,
             nrow=2,
             ncol = 2,
             top = "Raking Graphs")

dev.off()
```


### Uganda

```{r}
list_of_census_a_e_g_r <- list(Quotas_list_df$UGA.age,Quotas_list_df$UGA.education,Quotas_list_df$UGA.gender,Quotas_list_df$UGA.region)

c_age <- list_of_census_a_e_g_r[[1]]  
c_edu <- list_of_census_a_e_g_r[[2]]
c_gen <- list_of_census_a_e_g_r[[3]]
c_reg <- list_of_census_a_e_g_r[[4]]

SPA <- fn.dropNA(UGA)

age_count <- as.data.frame(xtabs(~age,SPA))

age_count$Freq <- (age_count$Freq/sum(age_count$Freq))*100
names_age <- age_count$age
data_age <- data.frame(names_age, age_count$Freq)

datalong_age <- melt(data_age, id=c("names_age"))

f1<- ggplot() +
  geom_bar(aes(x = names_age, y = value, fill = variable), data = datalong_age, 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red","blue","green4"), 
                    labels = c("No weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Age") + geom_point(data = c_age, aes(y = Freq, x = age))


gender_count <- as.data.frame(xtabs(~gender,SPA))
gender_count$Freq <- (gender_count$Freq/sum(gender_count$Freq))*100
names_gender <- gender_count$gender

data_gender <- data.frame(names_gender, gender_count$Freq)

datalong_gender <- melt(data_gender, id=c("names_gender"))


f2<-ggplot() +
  geom_bar(aes(x = names_gender, y = value, fill = variable),data =datalong_gender , 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red"), 
                    labels = c("No weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Gender") + geom_point(data = c_gen, aes(y = Freq, x = gender))





region_count <- as.data.frame(xtabs(~REGION_0,SPA))
region_count$Freq <- (region_count$Freq/sum(region_count$Freq))*100
names_region <- region_count$REGION_0

data_region <- data.frame(names_region, region_count$Freq)

datalong_region<- melt(data_region, id=c("names_region"))


f3<-ggplot() +
  geom_bar(aes(x = names_region, y = value, fill = variable),data = datalong_region, 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red"), 
                    labels = c("No weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Region") + geom_point(data = c_reg, aes(y = Freq, x = REGION_0))


education_count <- as.data.frame(xtabs(~education,SPA))

education_count$Freq <- (education_count$Freq/sum(education_count$Freq))*100

names_education <- education_count$education

data_education <- data.frame(names_education, education_count$Freq)

datalong_education<- melt(data_education, id=c("names_education"))


f4<-ggplot() +
  geom_bar(aes(x = names_education, y = value, fill = variable), data = datalong_education,
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red","blue","green4"), 
                    labels = c("No weights")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Education") + geom_point(data = c_edu, aes(y = Freq, x = education))

pdf_name <- paste('SPAweights','.pdf',sep = "")


pdf(pdf_name)


#print(f1)
#print(f2)
#print(f3)
#print(f4)

grid.arrange(f1, 
             f2,
             f3,
             f4,
             nrow=2,
             ncol = 2,
             top = "Raking Graphs")

dev.off()
```



## US Presidential election

```{r}
US_Pres <-  fn.merge(US,list(~age,~education),list(Quotas_list_df$US.age,Quotas_list_df$US.education))
US_Pres <-  merge(US,US_Pres,by ="id", all.x=TRUE)
US_Pres <-  US_Pres %>% select('id','weights','Q21.5')
```
#No Weights

```{r}
round(xtabs(~Q21.5,US_Pres)/sum(!is.na(US_Pres$Q21.5)),3)
```
With weights

```{r}
US_reweight_Pres <- US_Pres[!is.na(US_Pres$Q21.5),]
US_reweight_Pres$weights <- US_reweight_Pres$weights/sum(US_reweight_Pres$weights)

US_reweight_Pres %>% group_by_(.dots=c("Q21.5")) %>% summarize(Sum=round(sum(weights),3))
```

```{r}
dataframe_with_weights<-fn.rake(AUS,list(~education),list(Quotas_list_df$AUS.education))
```


```{r}
age_count <- as.data.frame(xtabs(~age,dataframe_with_weights))
age_count$Freq <- (age_count$Freq/sum(age_count$Freq))*100


age_w <- dataframe_with_weights %>% group_by_(.dots=c("age")) %>% summarize(sum=sum(weights))

names_age <- age_count$age
data_age <- data.frame(names_age, age_count$Freq, age_w$sum)

datalong_age <- melt(data_age, id=c("names_age"))

datos <- list(Quotas_list_df$AUS.age)

ggplot() +
  geom_bar(aes(x = names_age, y = value, fill = variable),data = datalong_age, 
           stat="identity", position = "dodge", width = 0.4) +
  geom_point(data = Quotas_list_df$AUS.age, aes(y = Freq, x = age)) +
  scale_fill_manual("Result\n", values = c("red","blue"), 
                    labels = c("No weights", "Weights")) +
  
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Age") 
```



##New Graph for US


```{r}
US_joints2 <- read.csv('US_joints2.csv')
US_joints2[1] <- NULL
US_edu <- read.csv('US_edu.csv')
US_edu[1] <- NULL
```



```{r}
fn.rake2<-function(dataframe,list_of_variables,list_of_quotas){
  dataframe <- fn.dropNA(dataframe)
  dsurvey <-svydesign(id=~id, data=dataframe)
  dsurveyr <- rake(dsurvey,list_of_variables,list_of_quotas)
  weights <- attr(dsurveyr[["postStrata"]][[1]][[1]],'weights')
  dataframe$weights <- weights
  return(dataframe)

  }
```


```{r}
 #<-fn.rake(US,list(~age,~education),list(Quotas_list_df$US.age,Quotas_list_df$US.education))
US_2 <- US
names(US_2)[9] <- "region"


#Need to change to REGIOM_0 in the DROPNA function region
US_2 <- fn.rake2(US_2,list(~region+gender+age,~education),list(US_joints2,US_edu))
  
US_2$weights <- US_2$weights/sum(US_2$weights)  
US_2$weights <- (US_2$weights)*100
```

```{r}

list_of_census_a_e_g_r <- list(Quotas_list_df$US.age,Quotas_list_df$US.education,Quotas_list_df$US.gender,Quotas_list_df$US.region)

c_age <- list_of_census_a_e_g_r[[1]]  
c_edu <- list_of_census_a_e_g_r[[2]]
c_gen <- list_of_census_a_e_g_r[[3]]
c_reg <- list_of_census_a_e_g_r[[4]]

#age_count1 <- as.data.frame(xtabs(~age,US_1))
age_count2 <- as.data.frame(xtabs(~age,US_2))

#age_count1$Freq <- (age_count1$Freq/sum(age_count1$Freq))*100
age_count2$Freq <- (age_count2$Freq/sum(age_count2$Freq))*100


#age_w1 <- US_1 %>% group_by_(.dots=c("age")) %>% summarize(sum=sum(weights))
age_w2 <- US_2 %>% group_by_(.dots=c("age")) %>% summarize(sum=sum(weights))


names_age <- age_count2$age

#data_age1 <- data.frame(names_age, age_count1$Freq, age_w1$sum)
data_age2 <- data.frame(names_age, age_count2$Freq, age_w2$sum)

#data_age <- cbind(data_age1,data_age2$age_w2.sum)

datalong_age <- melt(data_age2, id=c("names_age"))

f1<- ggplot() +
  geom_bar(aes(x = names_age, y = value, fill = variable), data = datalong_age, 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red","blue","green4"), 
                    labels = c("No weights", "Weights","Weights Two")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Age") + geom_point(data = c_age, aes(y = Freq, x = age))



#gender_count1 <- as.data.frame(xtabs(~gender,US_1))
gender_count2 <- as.data.frame(xtabs(~gender,US_2))

#gender_count1$Freq <- (gender_count1$Freq/sum(gender_count1$Freq))*100
gender_count2$Freq <- (gender_count2$Freq/sum(gender_count2$Freq))*100

#gender_w1 <- US_1 %>% group_by_(.dots=c("gender")) %>% summarize(sum=sum(weights))
gender_w2 <- US_2 %>% group_by_(.dots=c("gender")) %>% summarize(sum=sum(weights))

names_gender <- gender_count2$gender

#data_gender1 <- data.frame(names_gender, gender_count1$Freq, gender_w1$sum)
data_gender2 <- data.frame(names_gender, gender_count2$Freq, gender_w2$sum)

#data_gender <- cbind(data_gender1,data_gender2$gender_w2.sum)

datalong_gender <- melt(data_gender2, id=c("names_gender"))


f2<-ggplot() +
  geom_bar(aes(x = names_gender, y = value, fill = variable),data =datalong_gender , 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red","blue","green4"), 
                    labels = c("No weights", "Weights","Weights Two")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Gender") + geom_point(data = c_gen, aes(y = Freq, x = gender))





#region_count1 <- as.data.frame(xtabs(~REGION_0,US_1))
region_count2 <- as.data.frame(xtabs(~region,US_2))
                            
#region_count1$Freq <- (region_count1$Freq/sum(region_count1$Freq))*100
region_count2$Freq <- (region_count2$Freq/sum(region_count2$Freq))*100

#region_w1 <- US_1 %>% group_by_(.dots=c("REGION_0")) %>% summarize(sum=sum(weights))
region_w2 <- US_2 %>% group_by_(.dots=c("region")) %>% summarize(sum=sum(weights))

names_region <- region_count2$region

#data_region1 <- data.frame(names_region, region_count1$Freq, region_w1$sum)
data_region2 <- data.frame(names_region, region_count2$Freq, region_w2$sum)

#data_region <- cbind(data_region1,data_region2$region_w2.sum)

datalong_region<- melt(data_region2, id=c("names_region"))


f3<-ggplot() +
  geom_bar(aes(x = names_region, y = value, fill = variable),data = datalong_region, 
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red","blue","green4"), 
                    labels = c("No weights", "Weights", "Weights Two")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Region") + geom_point(data = c_reg, aes(y = Freq, x = REGION_0))


#education_count1 <- as.data.frame(xtabs(~education,US_1))
education_count2 <- as.data.frame(xtabs(~education,US_2))

#education_count1$Freq <- (education_count1$Freq/sum(education_count1$Freq))*100
education_count2$Freq <- (education_count2$Freq/sum(education_count2$Freq))*100

#education_w1 <- US_1 %>% group_by_(.dots=c("education")) %>% summarize(sum=sum(weights))
education_w2 <- US_2 %>% group_by_(.dots=c("education")) %>% summarize(sum=sum(weights))

names_education <- education_count2$education

#data_education1 <- data.frame(names_education, education_count1$Freq, education_w1$sum)
data_education2 <- data.frame(names_education, education_count2$Freq, education_w2$sum)

#data_education <- cbind(data_education1,data_education2$education_w2.sum)

datalong_education<- melt(data_education2, id=c("names_education"))


f4<-ggplot() +
  geom_bar(aes(x = names_education, y = value, fill = variable), data = datalong_education,
           stat="identity", position = "dodge", width = 0.4) +
  scale_fill_manual("Result\n", values = c("red","blue","green4"), 
                    labels = c("No weights", "Weights","Weights Two")) +
  labs(x="\nBins",y="Percentage\n") +
  theme_bw(base_size = 6) + ggtitle("Education") + geom_point(data = c_edu, aes(y = Freq, x = education))

pdf_name <- paste('US_w_2weights','.pdf',sep = "")


pdf(pdf_name)


#print(f1)
#print(f2)
#print(f3)
#print(f4)

grid.arrange(f1, 
             f2,
             f3,
             f4,
             nrow=2,
             ncol = 2,
             top = "Raking Graphs")

dev.off()

```


